#coding:utf-8
#将淘宝店html里面的商品id提取出来并存储在一个文件中
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
file_object = open('list.txt')
temp = []
result = []    
try:
     all_the_text = file_object.read( )
     #print all_the_text
     for item in all_the_text.split('//detail.tmall.com/item.htm?id='):
         temp.append(item.split('&')[0])
     if len(temp)>2:
         result.append(temp[1])
         for index in range(2,len(temp)):
              if temp[index]!=result[len(result)-1]:
                  result.append(temp[index])
except  NameError, msg:
     print  msg
finally:
     file_object.close( )

#存入文件     
if result:
    with open('get_all_url_temp.txt','ab+') as file:
        for out in result:
            file.write('https://detail.m.tmall.com/item.htm?id=')
            file.write(out)
            file.write("\n")
    #print len(result)